package edu.nctu.csie.jichang.tw2cn.tokenization;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import com.chenlb.mmseg4j.ComplexSeg;
import com.chenlb.mmseg4j.Dictionary;
import com.chenlb.mmseg4j.MMSeg;
import com.chenlb.mmseg4j.Seg;
import com.chenlb.mmseg4j.Word;

public class Tokenization{

	protected Dictionary dic;
	
	public Tokenization() {
		dic = Dictionary.getInstance();
	}
	
	protected Seg getSeg() {
		return new ComplexSeg(dic);
	}

	public List<String> doExecute(String pContent) {
		List<String> tOut = new ArrayList<String>();
		try {
			Seg seg = getSeg();
			MMSeg mmSeg = new MMSeg(new StringReader(pContent), seg);
			Word word = null;
			while((word=mmSeg.next())!=null) {
				tOut.add(word.getString());
			}
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		return tOut;
	}
}